# 抓取二手房

import requests
import re
from bs4 import BeautifulSoup
from lxml import etree
from fake_useragent import UserAgent
import csv

ua = UserAgent()
headers = {'User-Agent': ua.random}


def get_house_info():
    for i in range(1, 61):
        url = 'https://cn.58.com/ershoufang/pn%s/' % str(i)

        response = requests.get(url, headers=headers)
        response.encoding = response.apparent_encoding
        html = response.text
        selector = etree.HTML(html)
        myertee = selector.xpath('//ul[@class="house-list-wrap"]/li')

        for info in myertee:
            title = info.xpath('./div[2]/h2/a/text()')[0]
            print(title)

            house_info = info.xpath('./div[2]/p[1]/span/text()')

            house_info = '-'.join(house_info)
            house_info = re.sub(r'\s+', '', house_info)
            print(house_info)

            adress = info.xpath('./div[2]/p[2]/span/a/text()')

            adress = '-'.join(adress)
            adress = re.sub(r'\s+', '', adress)
            print(adress)
            # 销售信息
            sell_info = info.xpath('./div[2]/div/span/text()')[0]
            print(sell_info)
            person = info.xpath('./div[2]/div/a/span/text()')
            if not person:
                person = info.xpath('./div[2]/div/span[3]/text()')[0]
            print(person)
            all_price = info.xpath('./div[3]/p/b/text()')[0]
            all_price_unit = info.xpath('./div[3]/p/text()')[0]
            all_price = all_price + all_price_unit
            print(all_price)
            dan_price = info.xpath('./div[3]/p[@class="unit"]/text()')[0]
            print(dan_price)

            list = [title, house_info, adress, sell_info, person, all_price,
                    dan_price]

            save_info(list)


file = open('house.csv', 'wt', encoding='utf-8', newline='')
writer = csv.writer(file)

writer.writerow(
    ['title', 'house_info', 'adress', 'sell_info', 'person', 'all_price',
     'dan_price'])


def save_info(info):
    writer.writerow(info)


get_house_info()
